## This code replicates the analysis in sections 
## "Party Organization and Party Identification" and "The civil society connection"

## LOGIC OF THE CODE ##
## Loads the dataset composed of pooled survey microdata
## (See authors for details as to how the dataset was assembled)
## Produces the descriptive table presented in supplemental material
## Presents the random effects DiD analysis of effect of opening a branch on ID
## This is done for the PT, PSDB and PMDB sepparately
## In the analysis, produces Figures 3A and 3B (PT and PMDB only)
## At the end, the regression results are collected an pasted into a table (TABLE 3)

library(car)
library(survey)
library(Zelig)
library(ZeligMultilevel)
library(gtools)
library(reshape)
library(sandwich)
library(lmtest)
library(lme4)
library(xtable)
rm(list=ls(all=TRUE))

setwd("~/Dropbox/DATA/Paper-PT/__BJPS_replication")
savedir <- getwd() 

load("data_id.RData")

### Table A3: composition of samples in supplemental information
### Tables report number of RESPONDENTS in selected municipalities
tab13 <- table(openPT=subset(tall,pt2004==F)$open13,
					tt=subset(tall,pt2004==F)$tt)
tab15 <- table(openPMDB=subset(tall,pmdb2004==F)$open15,
					tt=subset(tall,pmdb2004==F)$tt)	
tab45 <- table(openPSDB=subset(tall,psdb2004==F)$open45,
					tt=subset(tall,psdb2004==F)$tt)
tabALL <- rbind(
	PT=c(tab13[1,],tab13[2,]),
	PMDB=c(tab15[1,],tab15[2,]),
	PSDB=c(tab45[1,],tab45[2,]))
print(xtable(cbind(tabALL))) #see below for a combination of this with mun number	

### Now look only at number of identifiers in the above sample
tab13i <- table(openPT=subset(tall,pt2004==F&ptid==T)$open13,
					tt=subset(tall,pt2004==F&ptid==T)$tt)
tab15i <- table(openPMDB=subset(tall,pmdb2004==F&pmdbid==T)$open15,
					tt=subset(tall,pmdb2004==F&pmdbid==T)$tt)	
tab45i <- table(openPSDB=subset(tall,psdb2004==F&psdbid==T)$open45,
					tt=subset(tall,psdb2004==F&psdbid==T)$tt)

#### Compute the number of municipalities
tabMUN <- rbind(
PTm=c(length(unique(subset(tall,pt2004==F&open13==F&tt==0)$codetse)),
	length(unique(subset(tall,pt2004==F&open13==F&tt==1)$codetse)), 
	length(unique(subset(tall,pt2004==F&open13==T&tt==0)$codetse)), 
	length(unique(subset(tall,pt2004==F&open13==T&tt==1)$codetse))),
PMDBm=c(length(unique(subset(tall,pmdb2004==F&open15==F&tt==0)$codetse)),
	length(unique(subset(tall,pmdb2004==F&open15==F&tt==1)$codetse)), 
	length(unique(subset(tall,pmdb2004==F&open15==T&tt==0)$codetse)), 
	length(unique(subset(tall,pmdb2004==F&open15==T&tt==1)$codetse))),
PSDBm=c(length(unique(subset(tall,psdb2004==F&open45==F&tt==0)$codetse)),
	length(unique(subset(tall,psdb2004==F&open45==F&tt==1)$codetse)), 
	length(unique(subset(tall,psdb2004==F&open45==T&tt==0)$codetse)), 
	length(unique(subset(tall,psdb2004==F&open45==T&tt==1)$codetse))))
print(xtable(tabMUN))

#### Generate table A3 in Supplemental Information
print(xtable(cbind(rep(c("Ind.","Mun."),3),
			rbind(tabALL,tabMUN)[c(1,4,2,5,3,6),])))


############################################################################
##### Analysis presented in the paper ######################################

##### Declare functions used in the analysis
cl   <- function(dat,fm, cluster){
             attach(dat, warn.conflicts = F)
	           require(sandwich)
               require(lmtest)
	           M <- length(unique(cluster))
	           N <- length(cluster)
	           K <- fm$rank
	           dfc <- (M/(M-1))*((N-1)/(N-K))
	           uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
	           vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
	           cls <- coeftest(fm, vcovCL) 
             detach(dat)
             return(list(estimates=cls,vcov=vcovCL))}
             
get.olsfecl <- function(x){ #used to assemble results in a table
			out<-x[c(1,nrow(x)),-3]
			out<-c(out[1,],out[2,])
			return(round(out,3))}
get.olsre <- function(x){ #used to assemble results in a table
			require(multcomp)
			options(warn = -1)
			tmp <- cftest(x,vcov(x)) #compute p-values from RE
			tmp <-do.call("cbind", tmp$test[3:6]) 
		    out <- tmp[c(1,nrow(tmp)),-3]
		    out <-c(out[1,],out[2,])
		    return(round(out,3))}

stack.cl <- function(x,ctrl=T){ #x is an output object from cl.
  y <- data.frame(unclass(x))
  tmp<-rbind(y[,"Estimate"],
                    y[,"Std..Error"],
                    y[,"Pr...t.."])
  colnames(tmp) <-  rownames(y)                
  core<-stack(data.frame(round(tmp,3)))
  core$ind <- as.character(core$ind)
  core$type <- rep(c(1:3),nrow(core)/3)
  core<-rbind(core,c(NA,"zN",NA))
  core<-rbind(core,c(NA,"zGroups",NA))
	return(core)}

stack.glmer <- function(x,ctrl=T){ #x is an output object from glmer.
  y <- summary(x)$coefficients
  tmp<-rbind(y[,"Estimate"],
                    y[,"Std. Error"],
                    y[,"Pr(>|z|)"])
  colnames(tmp) <-  rownames(y)                
  core<-stack(data.frame(round(tmp,3)))
  core$ind <- as.character(core$ind)
  core$type <- rep(c(1:3),nrow(core)/3)
  if(ctrl!=T){
  to.keep <- c(grep("Intercept",core$ind),
  				union(grep("tt",core$ind),grep("open\\d\\d",core$ind)))
  core <- core[to.keep,]
  }
  core<-rbind(core,c(summary(x)$devcomp$dims[1],"zN",NA))
  core<-rbind(core,c(summary(x)$ngrps,"zGroups",NA))
   return(core)}

stack.lmer <- function(x,ctrl=T){ #x is a object lmer...
	#lmer objects don't have pvalues
	y <- summary(x)$coefficients
	pvalue <- cftest(x,vcov(x))$test$pvalues  #compute p-value
	y <- cbind(y,pvalue)
	tmp<-rbind(y[,"Estimate"],
                    y[,"Std. Error"],
                    y[,"pvalue"])
  colnames(tmp) <-  rownames(y)                
  core<-stack(data.frame(round(tmp,3)))
  core$ind <- as.character(core$ind)
  core$type <- rep(c(1:3),nrow(core)/3)
  if(ctrl!=T){
  to.keep <- c(grep("Intercept",core$ind),
  				union(grep("tt",core$ind),grep("open\\d\\d",core$ind)))
  core <- core[to.keep,]
  }
  core<-rbind(core,c(summary(x)$devcomp$dims[1],"zN",NA))
  core<-rbind(core,c(summary(x)$ngrps,"zGroups",NA))
   return(core)}

#### DID for electoral results ################################


#### ANALYSIS FOR THE PT (party 13)  ##########################
### Fixed effects with clustered SE 
### Subset original data so that comparison is only between
### municipalities where parties were not present at start of cycle 
the.dat <- na.omit(subset(tall,pt2004==F,	
			select=c(ptid,open13,tt,mun)))
rfe13nc <- lm(ptid~tt*open13,data=the.dat)
rfe13nccl <- cl(rfe13nc$model,rfe13nc,the.dat$mun)[[1]]
rfe13nccl.meta <-  c(Controls=0,N=nrow(rfe13nc$model),#metadata for table
				Muns=length(unique(the.dat$mun)))

### OLS with random effects  (opening a branch)
rre13.1nc  <- lmer(ptid~tt*open13+(tt*open13|mun),data=subset(tall,pt2004==F)) 
rre13.1nc.meta  <- c(Controls=0,N=nrow(rre13.1nc@frame),
					Muns=length(unique(rre13.1nc@frame$mun)))

### Logit with random effecs #compute marginals by hand, Zelig cna't handle it
#lre13.2cz <- zelig(ptid~tt*open13+sex+income+age+tag(tt*open13|mun),
#					data=subset(tall,pt2004==F), model = "logit.mixed",cite=F)
lre13.2c <- glmer(ptid~tt*open13+sex+income+age+(tt*open13|mun),
					data=subset(tall,pt2004==F), family=binomial(link=logit))
	#### Do bootstrapped predicted probabilities for logit random effects by hand!
	library(mvtnorm)
	my.setx <- function(x,tt,open13){ #create simulated values
		#modal categories were computed AS IN zelig's setx
		#see: print(data.frame(setx(lre13.2cz,open13=0,tt=0)$values))
		vals <- c(1,tt,open13,0,rep(0,5),1,rep(0,2),tt*open13)
		names(vals)<- names(fixef(x))
		return(vals )
	}
	x.ctrl.t0 <- my.setx(lre13.2c,open13=0,tt=0)
	x.treat.t0 <- my.setx(lre13.2c,open13=1,tt=0) #create x0, x1
	x.ctrl.t1 <- my.setx(lre13.2c,open13=0,tt=1)
	x.treat.t1 <- my.setx(lre13.2c,open13=1,tt=1) #create x0, x1
	the.coefs <- fixef(lre13.2c)  #using glmer
	the.vcov <- as.matrix(vcov(lre13.2c)) #using glmer
	sims <- rmvnorm(n=10000,mean=as.numeric(the.coefs),sigma=the.vcov) #boostrap
		#Average ranef contribution to predicted value
		revars <- names(ranef(lre13.2c)[[1]]) 		#raneff variable names
		av.re <- apply(ranef(lre13.2c)[[1]],2,mean) #average random effects coefficients
		re.ctrl.t0 <- av.re%*%x.ctrl.t0[revars]
		re.ctrl.t1 <- av.re%*%x.ctrl.t1[revars]
		re.treat.t0 <- av.re%*%x.treat.t0[revars]
		re.treat.t1 <- av.re%*%x.treat.t1[revars]
	pred.ctrl.t0 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t0+re.ctrl.t0)})
	pred.ctrl.t1 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t1+re.ctrl.t1)})
	pred.ctrl.diff <- pred.ctrl.t1-pred.ctrl.t0
	pred.treat.diff <- apply(sims,1,function(x){plogis(x%*%x.treat.t1+re.treat.t1)})-
						apply(sims,1,function(x){plogis(x%*%x.treat.t0+re.treat.t0)})
	ates <- pred.treat.diff-pred.ctrl.diff #compute simulated effects
	ci.effect <- quantile(ates,prob=c(0.025,0.975)) #95% CI
	logit.re13 <- round(c(mean(pred.ctrl.t0),sd(pred.ctrl.t0),
				  sum(pred.ctrl.t0<=0)/length(pred.ctrl.t0),
				  mean(ates),sd(ates),2*sum(ates<=0)/length(ates),
				  Controls=1,nrow(lre13.2c@frame), 
				  length(unique(lre13.2c@frame$mun))),3)
		
### Create table with three results for PT:
### Clustered, linear re, and logit re marginals 
### This is part of Table 3
the.table13 <- cbind( 
	  ClusteredSE=c(get.olsfecl(rfe13nccl),rfe13nccl.meta),
      Rand.Effects=c(get.olsre(rre13.1nc),rre13.1nc.meta),
      Logit.RE=logit.re13) 
row.names(the.table13) <- c(paste(c(rep("Baseline",3),
						rep("Effect",3),""),
						row.names(the.table13)))
the.xtable13 <- xtable(the.table13,digits=3)
caption(the.xtable13)=
	"Effect of Establishing Local Presence on Identification With the PT"
print(the.xtable13,capiton.placement="top",hline.after=c(0,3,6,9))

### Create large table with the three models for appendix 
### This is part of table A5
largetable13 <- merge(merge(
				stack.cl(rfe13nccl),stack.lmer(rre13.1nc),
				by=c("ind","type"),suffixes=c("cl","re"),all=T),
				stack.glmer(lre13.2c),by=c("ind","type"),all=T)
				  

#### ANALYSIS FOR THE PMDB  (party 15) ########################################
#Fixed effects with clustered SE
the.dat <- na.omit(subset(tall,pmdb2004==F,
					select=c(pmdbid,open15,tt,mun)))
rfe15nc <- lm(pmdbid~tt*open15,data=the.dat)
rfe15nccl <- cl(rfe15nc$model,rfe15nc,the.dat$mun)[[1]]
rfe15nccl.meta <-  c(Controls=0,N=nrow(rfe15nc$model),
				Muns=length(unique(the.dat$mun)))

### OLS with random effects  (opening a branch)
rre15.1nc  <- lmer(pmdbid~tt*open15+(tt*open15|mun),data=subset(tall,pmdb2004==F)) 
rre15.1nc.meta  <- c(Controls=0,N=nrow(rre15.1nc@frame),
					Muns=length(unique(rre15.1nc@frame$mun)))
rre15.1c <- lmer(pmdbid~tt*open15+sex+income+age+(tt*open15|mun),
					data=subset(tall,pmdb2004==F))

### Logit with random effecs #compute marginals by hand, Zelig cna't handle it
#lre15.2cz <- zelig(pmdbid~tt*open15+sex+income+age+tag(1+tt*open15|mun),
#					data=subset(tall,pmdb2004==F), model = "logit.mixed",cite=F)
lre15.2c <- glmer(pmdbid~tt*open15+sex+income+age+(1+tt*open15|mun),
					data=subset(tall,pmdb2004==F), family=binomial(link=logit))

#### Do bootstrapped predicted probabilities for logit random effects by hand!
	library(mvtnorm)
	my.setx <- function(x,tt,open15){ #create simulated values
		#modal categories were computed AS IN zelig's setx
		#see: print(data.frame(setx(lre15.2cz,open15=0,tt=0)$values))
		vals <- c(1,tt,open15,0,rep(0,4),1,rep(0,3),tt*open15)
		names(vals)<- names(fixef(x))
		return(vals )
	}
	x.ctrl.t0 <- my.setx(lre15.2c,open15=0,tt=0)
	x.treat.t0 <- my.setx(lre15.2c,open15=1,tt=0) #create x0, x1
	x.ctrl.t1 <- my.setx(lre15.2c,open15=0,tt=1)
	x.treat.t1 <- my.setx(lre15.2c,open15=1,tt=1) #create x0, x1
	the.coefs <- fixef(lre15.2c)  #extract coefficeints
	the.vcov <- as.matrix(vcov(lre15.2c)) #using glmer
	sims <- rmvnorm(n=10000,mean=as.numeric(the.coefs),sigma=the.vcov) #boostrap
		#Average ranef contribution to predicted value
		revars <- names(ranef(lre15.2c)[[1]]) 		#raneff variable names
		av.re <- apply(ranef(lre15.2c)[[1]],2,mean) #average random effects coefficients
		re.ctrl.t0 <- av.re%*%x.ctrl.t0[revars]
		re.ctrl.t1 <- av.re%*%x.ctrl.t1[revars]
		re.treat.t0 <- av.re%*%x.treat.t0[revars]
		re.treat.t1 <- av.re%*%x.treat.t1[revars]
	pred.ctrl.t0 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t0+re.ctrl.t0)})
	pred.ctrl.t1 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t1+re.ctrl.t1)})
	pred.ctrl.diff <- pred.ctrl.t1-pred.ctrl.t0
	pred.treat.diff <- apply(sims,1,function(x){plogis(x%*%x.treat.t1+re.treat.t1)})-
						apply(sims,1,function(x){plogis(x%*%x.treat.t0+re.treat.t0)})
	ates <- pred.treat.diff-pred.ctrl.diff #compute simulated effects
	ci.effect <- quantile(ates,prob=c(0.025,0.975)) #95% CI
	logit.re15 <- round(c(mean(pred.ctrl.t0),sd(pred.ctrl.t0),
				  sum(pred.ctrl.t0<=0)/length(pred.ctrl.t0),
				  mean(ates),sd(ates),2*sum(sign(mean(ates))*ates<=0)/length(ates),
				  Controls=1,nrow(lre15.2c@frame), 
				  length(unique(lre15.2c@frame$mun))),3)
			

### Create table with clustered, linear re, and logit re marginals
### This is part of Table 3
the.table15 <- cbind( 
	  ClusteredSE=c(get.olsfecl(rfe15nccl),rfe15nccl.meta),
      Rand.Effects=c(get.olsre(rre15.1nc),rfe15nccl.meta),
      Logit.RE=logit.re15) 
row.names(the.table15) <- c(paste(c(rep("Baseline",3),
						rep("Effect",3),""),
						row.names(the.table15)))
the.xtable15 <- xtable(the.table15,digits=3)
caption(the.xtable15)=
	"Effect of Establishing Local Presence on Identification With the PMDB"
print(the.xtable15,capiton.placement="top",hline.after=c(0,3,6,9))

### Create raw coefficiets table with three models, for appendix
### This is part of Table A5
largetable15 <- merge(merge(
				stack.cl(rfe15nccl),stack.lmer(rre15.1nc),
				by=c("ind","type"),suffixes=c("cl","re"),all=T),
				stack.glmer(lre15.2c),by=c("ind","type"),all=T)
				  

#### ANALYSIS FOR THE PSDB (party 45) ###################################
#Fixed effects with clustered SE
the.dat <- na.omit(subset(tall,psdb2004==F,
			select=c(psdbid,open45,tt,mun)))
rfe45nc <- lm(psdbid~tt*open45,data=the.dat)
rfe45nccl <- cl(rfe45nc$model,rfe45nc,the.dat$mun)[[1]]
rfe45nccl.meta <-  c(Controls=0,N=nrow(rfe45nc$model),
				Muns=length(unique(the.dat$mun)))

### OLS with random effects  (opening a branch)
rre45.1nc  <- lmer(psdbid~tt*open45+(1+tt*open45|mun),data=subset(tall,psdb2004==F)) 
rre45.1nc.meta  <- c(Controls=0,N=nrow(rre45.1nc@frame),
					Muns=length(unique(rre45.1nc@frame$mun)))
rre45.1c <- lmer(psdbid~tt*open45+sex+income+age+(tt*open45|mun),
					data=subset(tall,psdb2004==F))

### Logit with random effecs #compute marginals by hand, Zelig cna't handle it
#lre45.2cz <- zelig(psdbid~tt*open45+sex+income+age+tag(tt*open45|mun),
#					data=subset(tall,psdb2004==F), model = "logit.mixed",cite=F)
lre45.2c <- glmer(psdbid~tt*open45+sex+income+age+(tt*open45|mun),
					data=subset(tall,psdb2004==F), family=binomial(link=logit))

#### Do bootstrapped predicted probabilities for logit random effects by hand!
	library(mvtnorm)
	my.setx <- function(x,tt,open45){ #create simulated values
		#modal categories were computed AS IN zelig's setx
		#see: print(data.frame(setx(lre45.2cz,open45=0,tt=0)$values))
		vals <- c(1,tt,open45,0,rep(0,4),0,1,rep(0,2),tt*open45)
		names(vals)<- names(fixef(x))
		return(vals )
	}
	x.ctrl.t0 <- my.setx(lre45.2c,open45=0,tt=0)
	x.treat.t0 <- my.setx(lre45.2c,open45=1,tt=0) #create x0, x1
	x.ctrl.t1 <- my.setx(lre45.2c,open45=0,tt=1)
	x.treat.t1 <- my.setx(lre45.2c,open45=1,tt=1) #create x0, x1
	the.coefs <- fixef(lre45.2c)  #using glmer
	the.vcov <- as.matrix(vcov(lre45.2c)) #using glmer
	sims <- rmvnorm(n=10000,mean=as.numeric(the.coefs),sigma=the.vcov) #boostrap
		#Average ranef contribution to predicted value
		revars <- names(ranef(lre45.2c)[[1]]) 		#raneff variable names
		av.re <- apply(ranef(lre45.2c)[[1]],2,mean) #average random effects coefficients
		re.ctrl.t0 <- av.re%*%x.ctrl.t0[revars]
		re.ctrl.t1 <- av.re%*%x.ctrl.t1[revars]
		re.treat.t0 <- av.re%*%x.treat.t0[revars]
		re.treat.t1 <- av.re%*%x.treat.t1[revars]
	pred.ctrl.t0 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t0+re.ctrl.t0)})
	pred.ctrl.t1 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t1+re.ctrl.t1)})
	pred.ctrl.diff <- pred.ctrl.t1-pred.ctrl.t0
	pred.treat.diff <- apply(sims,1,function(x){plogis(x%*%x.treat.t1+re.treat.t1)})-
						apply(sims,1,function(x){plogis(x%*%x.treat.t0+re.treat.t0)})
	ates <- pred.treat.diff-pred.ctrl.diff #compute simulated effects
	ci.effect <- quantile(ates,prob=c(0.025,0.975)) #95% CI
	logit.re45 <- round(c(mean(pred.ctrl.t0),sd(pred.ctrl.t0),
				  sum(pred.ctrl.t0<=0)/length(pred.ctrl.t0),
				  mean(ates),sd(ates),2*sum(sign(mean(ates))*ates<=0)/length(ates),
				  Controls=1,nrow(lre45.2c@frame), 
				  length(unique(lre45.2c@frame$mun))),3)

### Create table with clustered, linear re, and logit re marginals 
### This is part of Table 3
the.table45 <- cbind( 
	  ClusteredSE=c(get.olsfecl(rfe45nccl),rfe45nccl.meta),
      Rand.Effects=c(get.olsre(rre45.1nc),rre45.1nc.meta),
      Logit.RE=logit.re45) 
row.names(the.table45) <- c(paste(c(rep("Baseline",3),
						rep("Effect",3),""),
						row.names(the.table45)))
the.xtable45 <- xtable(the.table15,digits=3)
caption(the.xtable45)=
	"Effect of Establishing Local Presence on Identification With the PSDB"
print(the.xtable45,capiton.placement="top",hline.after=c(0,3,6,9))

### Create raw coefficiets table with three models, for appendix
### This is part of Table A5
largetable45 <- merge(merge(
				stack.cl(rfe45nccl),stack.lmer(rre45.1nc),
				by=c("ind","type"),suffixes=c("cl","re"),all=T),
				stack.glmer(lre45.2c),by=c("ind","type"),all=T)
				  	
				
				
#### COLLECT RESULTS INTO A SINGLE TABLE##########################	
#### Assemble the table with three models for each party!
#### Presented as TABLE 3 in the paper
the.table <- data.frame(pt=the.table13,pmdb=the.table15,psdb=the.table45)
the.xtable <- xtable(the.table,digits=3)
caption(the.xtable)=
	"Effect of Establishing Local Presence on Identification with PT, PMDB, and PSDB"
print(the.xtable,capiton.placement="top",hline.after=c(0,3,6,9))

### Assemble complete LARGE TABLE with complete three models for each party
### Table A.5 in Supplemental Information
largetable <- data.frame(pt=largetable13,
						pmdb=largetable15[,-c(1,2)],
						psdb=largetable45[,-c(1,2)])
	selected.first <- c(
	grep("Intercept",largetable$pt.ind),
	grep("^tt1$",largetable$pt.ind),
	grep("^open\\d\\dTRUE$",largetable$pt.ind,perl=T),
	grep("tt1\\.open\\d\\dTRUE$",largetable$pt.ind))
	selected.last <- c(
	grep("zN",largetable$pt.ind),
	grep("zGroups",largetable$pt.ind))
	others <- setdiff(1:nrow(largetable),c(selected.last,selected.first))	 
largetable <- largetable[c(selected.first,others,selected.last),]
largetable[,2] <- ifelse(largetable[,2]==2,"SE",ifelse(largetable[,2]==3,"p-val",""))
rownames(largetable) <- paste(largetable[,1],largetable[,2],sep="")
largetable <- largetable[,-c(1,2)]
largetable.xtable <- xtable(largetable,digits=3)
caption(largetable.xtable)=
	"Effects of Local Presence on Identification with PT, PMDB, and PSDB (All coefficients)"
print(largetable.xtable,capiton.placement="top",hline.after=c(0,3,6,9))


#### ANALISYS FOR THE CIVIL SOCIETY CONNECTION 
#### Here we add an interaction term between civil society density and opening a branch
#### Plots shown only for PT and PMDB, in paper

### Logit with random effecs adding ngo density for PT
tall$logngodensity <- log(tall$ngodensity.2002+1)
lre13.3c <- glmer(ptid~tt*open13*logngodensity+sex+income+age+(tt*open13|mun),
					data=subset(tall,pt2004==F), family=binomial(link=logit))

#### Do bootstrapped predicted probabilities for logit random effects by hand!
	library(mvtnorm)
	my.setx <- function(x,tt,open13,logngodensity){ #create simulated values
		#modal categories were computed AS IN zelig's setx
		#see: print(data.frame(setx(lre13.2cz,open13=0,tt=0)$values))
		ngo<-logngodensity
		vals <- c(1,tt,open13,ngo,0,rep(0,5),1,rep(0,2),
				tt*open13,tt*ngo,open13*ngo,tt*open13*ngo)
		names(vals)<- names(fixef(x))
		return(vals )
	}
	dens <- seq(min(subset(tall,pt2004==F)$logngodensity,na.rm=T),
				max(subset(tall,pt2004==F)$logngodensity,na.rm=T),len=10)
	#dens <- seq(min(tall$logngodensity,na.rm=T),		#use same scale for PT and PMDB
	#			max(tall$logngodensity,na.rm=T),len=10)
	pred.ngodensity13<- matrix(NA,ncol=11,nrow=length(dens))
	colnames(pred.ngodensity13)=c("Intecept","Intercept.Se","Intercept.P",
									"Ate","Ate.Se","Ate.P",
									"Controls","N","Mun","ci.low","ci.high")
	for(i in 1:length(dens)){#loop over range of logngodensity
	x.ctrl.t0 <- my.setx(lre13.3c,open13=0,tt=0,logngodensity=dens[i])
	x.treat.t0 <- my.setx(lre13.3c,open13=1,tt=0,logngodensity=dens[i]) #create x0, x1
	x.ctrl.t1 <- my.setx(lre13.3c,open13=0,tt=1,logngodensity=dens[i])
	x.treat.t1 <- my.setx(lre13.3c,open13=1,tt=1,logngodensity=dens[i]) #create x0, x1
	the.coefs <- fixef(lre13.3c) #extract coefficeints
	the.vcov <- as.matrix(vcov(lre13.3c)) #extract vcov
	sims <- rmvnorm(n=1000,mean=as.numeric(the.coefs),sigma=the.vcov) #boostrap
		#Average ranef contribution to predicted value
		revars <- names(ranef(lre13.3c)[[1]]) 		#raneff variable names
		av.re <- apply(ranef(lre13.3c)[[1]],2,mean) #average random effects coefficients
		re.ctrl.t0 <- av.re%*%x.ctrl.t0[revars]
		re.ctrl.t1 <- av.re%*%x.ctrl.t1[revars]
		re.treat.t0 <- av.re%*%x.treat.t0[revars]
		re.treat.t1 <- av.re%*%x.treat.t1[revars]
	pred.ctrl.t0 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t0+re.ctrl.t0)})
	pred.ctrl.t1 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t1+re.ctrl.t1)})
	pred.ctrl.diff <- pred.ctrl.t1-pred.ctrl.t0
	pred.treat.diff <- apply(sims,1,function(x){plogis(x%*%x.treat.t1+re.treat.t1)})-
						apply(sims,1,function(x){plogis(x%*%x.treat.t0+re.treat.t0)})
	ates <- pred.treat.diff-pred.ctrl.diff #compute simulated effects
	ci.effect <- quantile(ates,prob=c(0.025,0.975)) #95% CI
	pred.ngodensity13[i,] <-round(c(intercept=mean(pred.ctrl.t0),
					intercept.sd=sd(pred.ctrl.t0),
				  intercept.p=sum(pred.ctrl.t0<=0)/length(pred.ctrl.t0),
				  ate=mean(ates),sd.ate=sd(ates),p.ate=2*sum(ates<=0)/length(ates),
				  Controls=1,N=nrow(lre13.3c@frame), 
				  Mun=length(unique(lre13.3c@frame$mun)),
				  ci.effect),3)
	}
	pred.ngodensity13<-data.frame(logngodensity=dens,pred.ngodensity13)		
		
	### PLOT CHANGE IN EFFECTS WITH NGO DENSITY FOR PT
	### Figure 3.A in the paper
	pdf(file="fig-3a.pdf",height=7,width=7)
	#png(file="fig-3a.png",res=300,height=7,width=7,units="in")
	x<-pred.ngodensity13
	par(mar=c(4,4,1,1))
	plot(x$logngodensity,x$Ate,type="l",xlab="",ylab="",xaxt="n",
		ylim=range(c(x$ci.low,x$ci.high)))
	abline(h=0)
	polygon(x=c(x$logngodensity,x$logngodensity[nrow(x):1]),
			y=c(x$ci.low,x$ci.high[nrow(x):1]),
			,border=NA,col=gray(0.4))
	lines(x$logngodensity,x$Ate,col=gray(0.9))
	axis(side=1,at=subset(tall,pt2004==F)$logngodensity,labels=NA,line=0)
	axis(side=1,at=
		quantile(subset(tall,pt2004==F)$logngodensity,prob=c(0,0.05,0.5,0.95,1),na.rm=T),
				tick=T,labels=c("Low","5th","50th","95th","High"),col=1,line=0)
	mtext("(Log of) NGO Density",side=1,line=2.5)
	mtext("Effect of Establishing Local Presence on Identification with PT",side=2,line=2.5)
	dev.off()
	
	
### Logit with random effecs adding ngo density for PMDB
tall$logngodensity <- log(tall$ngodensity.2002+1)
#lre15.3cz <- zelig(ptid~tt*open15*logngodensity+sex+income+age+tag(tt*open15|mun),
#					data=subset(tall,pmdb2004==F), model = "logit.mixed")
lre15.3c <- glmer(pmdbid~tt*open15*logngodensity+sex+income+age+(tt*open15|mun),
					data=subset(tall,pmdb2004==F), family=binomial(link=logit))

	#### Do bootstrapped predicted probabilities for logit random effects by hand!
	library(mvtnorm)
	my.setx <- function(x,tt,open15,logngodensity){ #create simulated values
		#modal categories were computed AS IN zelig's setx
		#see: print(data.frame(setx(lre13.2cz,open13=0,tt=0)$values))
		ngo<-logngodensity
		vals <- c(1,tt,open15,ngo,0,rep(0,4),1,rep(0,3),
				tt*open15,tt*ngo,open15*ngo,tt*open15*ngo)
		names(vals)<- names(fixef(x))
		return(vals )
	}
	dens <- seq(min(subset(tall,pmdb2004==F)$logngodensity,na.rm=T),
				max(subset(tall,pmdb2004==F)$logngodensity,na.rm=T),len=10)
	#dens <- seq(min(tall$logngodensity,na.rm=T),		#use same scale for PT and PMDB
	#			max(tall$logngodensity,na.rm=T),len=10)
	pred.ngodensity15<- matrix(NA,ncol=11,nrow=length(dens))
	colnames(pred.ngodensity15)=c("Intecept","Intercept.Se","Intercept.P",
									"Ate","Ate.Se","Ate.P",
									"Controls","N","Mun","ci.low","ci.high")
	for(i in 1:length(dens)){#loop over range of logngodensity
	x.ctrl.t0 <- my.setx(lre15.3c,open15=0,tt=0,logngodensity=dens[i])
	x.treat.t0 <- my.setx(lre15.3c,open15=1,tt=0,logngodensity=dens[i]) #create x0, x1
	x.ctrl.t1 <- my.setx(lre15.3c,open15=0,tt=1,logngodensity=dens[i])
	x.treat.t1 <- my.setx(lre15.3c,open15=1,tt=1,logngodensity=dens[i]) #create x0, x1
	the.coefs <- fixef(lre15.3c) #extract coefficeints
	the.vcov <- as.matrix(vcov(lre15.3c)) #extract vcov
	sims <- rmvnorm(n=1000,mean=as.numeric(the.coefs),sigma=the.vcov) #boostrap
		#Average ranef contribution to predicted value
		revars <- names(ranef(lre15.3c)[[1]]) 		#raneff variable names
		av.re <- apply(ranef(lre15.3c)[[1]],2,mean) #average random effects coefficients
		re.ctrl.t0 <- av.re%*%x.ctrl.t0[revars]
		re.ctrl.t1 <- av.re%*%x.ctrl.t1[revars]
		re.treat.t0 <- av.re%*%x.treat.t0[revars]
		re.treat.t1 <- av.re%*%x.treat.t1[revars]
	pred.ctrl.t0 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t0+re.ctrl.t0)})
	pred.ctrl.t1 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t1+re.ctrl.t1)})
	pred.ctrl.diff <- pred.ctrl.t1-pred.ctrl.t0
	pred.treat.diff <- apply(sims,1,function(x){plogis(x%*%x.treat.t1+re.treat.t1)})-
						apply(sims,1,function(x){plogis(x%*%x.treat.t0+re.treat.t0)})
	ates <- pred.treat.diff-pred.ctrl.diff #compute simulated effects
	ci.effect <- quantile(ates,prob=c(0.025,0.975)) #95% CI
	pred.ngodensity15[i,] <-round(c(intercept=mean(pred.ctrl.t0),
					intercept.sd=sd(pred.ctrl.t0),
				  intercept.p=sum(pred.ctrl.t0<=0)/length(pred.ctrl.t0),
				  ate=mean(ates),sd.ate=sd(ates),p.ate=2*sum(ates<=0)/length(ates),
				  Controls=1,N=nrow(lre15.3c@frame), 
				  Mun=length(unique(lre15.3c@frame$mun)),
				  ci.effect),3)
	}
	pred.ngodensity15<-data.frame(logngodensity=dens,pred.ngodensity15)
	
	### PLOT CHANGE IN EFFECTS WITH NGO DENSITY FOR PMDB
	### Figure 3.B in the paper
	#pdf(file="fig-3b.pdf",height=7,width=7)
	png(file="fig-3b.png",res=300,height=7,width=7,units="in")
	x<-pred.ngodensity15
	par(mar=c(4,4,1,1))
	plot(x$logngodensity,x$Ate,type="l",xlab="",ylab="",xaxt="n",
		#ylim=range(c(x$ci.low,x$ci.high))) #use the same scale as for the PT
		ylim=range(c(pred.ngodensity13$ci.low,pred.ngodensity13$ci.high)))
	abline(h=0)
	polygon(x=c(x$logngodensity,x$logngodensity[nrow(x):1]),
			y=c(x$ci.low,x$ci.high[nrow(x):1]),
			,border=NA,col=gray(0.4))
	lines(x$logngodensity,x$Ate,col=gray(0.9))
	axis(side=1,at=subset(tall,pmdb2004==F)$logngodensity,labels=NA,line=0)
	axis(side=1,at=
		quantile(subset(tall,pmdb2004==F)$logngodensity,
				prob=c(0,0.05,0.5,0.95,1),na.rm=T),
				tick=T,labels=c("Low","5th","50th","95th","High"),col=1,line=0)
	mtext("(Log of) NGO Density",side=1,line=2.5)
	mtext("Effect of Establishing Local Presence on Identificaiton with PMDB",side=2,line=2.5)
	dev.off()		

#### Logit with random effecs adding ngo density for PSDB (not in  paper)
lre45.3c <- glmer(psdbid~tt*open45*logngodensity+sex+income+age+(tt*open45|mun),
					data=subset(tall,psdb2004==F), family=binomial(link=logit))

#### Do bootstrapped predicted probabilities for logit random effects by hand!
	library(mvtnorm)
	my.setx <- function(x,tt,open45,logngodensity){ #create simulated values
		ngo<-logngodensity
		vals <- c(1,tt,open45,ngo,0,rep(0,4),0,1,rep(0,2),
				tt*open45,tt*ngo,open45*ngo,tt*open45*ngo)
		names(vals)<- names(fixef(x))
		return(vals )
	}
	dens <- seq(min(subset(tall,psdb2004==F)$logngodensity,na.rm=T),
				max(subset(tall,psdb2004==F)$logngodensity,na.rm=T),len=10)
	pred.ngodensity45<- matrix(NA,ncol=11,nrow=length(dens))
	colnames(pred.ngodensity45)=c("Intecept","Intercept.Se","Intercept.P",
									"Ate","Ate.Se","Ate.P",
									"Controls","N","Mun","ci.low","ci.high")
	for(i in 1:length(dens)){#loop over range of logngodensity
	x.ctrl.t0 <- my.setx(lre45.3c,open45=0,tt=0,logngodensity=dens[i])
	x.treat.t0 <- my.setx(lre45.3c,open45=1,tt=0,logngodensity=dens[i]) #create x0, x1
	x.ctrl.t1 <- my.setx(lre45.3c,open45=0,tt=1,logngodensity=dens[i])
	x.treat.t1 <- my.setx(lre45.3c,open45=1,tt=1,logngodensity=dens[i]) #create x0, x1
	the.coefs <- fixef(lre45.3c) #extract coefficeints
	the.vcov <- as.matrix(vcov(lre45.3c)) #extract vcov
	sims <- rmvnorm(n=1000,mean=as.numeric(the.coefs),sigma=the.vcov) #boostrap
		#Average ranef contribution to predicted value
		revars <- names(ranef(lre45.3c)[[1]]) 		#raneff variable names
		av.re <- apply(ranef(lre45.3c)[[1]],2,mean) #average random effects coefficients
		re.ctrl.t0 <- av.re%*%x.ctrl.t0[revars]
		re.ctrl.t1 <- av.re%*%x.ctrl.t1[revars]
		re.treat.t0 <- av.re%*%x.treat.t0[revars]
		re.treat.t1 <- av.re%*%x.treat.t1[revars]
	pred.ctrl.t0 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t0+re.ctrl.t0)})
	pred.ctrl.t1 <- apply(sims,1,function(x){plogis(x%*%x.ctrl.t1+re.ctrl.t1)})
	pred.ctrl.diff <- pred.ctrl.t1-pred.ctrl.t0
	pred.treat.diff <- apply(sims,1,function(x){plogis(x%*%x.treat.t1+re.treat.t1)})-
						apply(sims,1,function(x){plogis(x%*%x.treat.t0+re.treat.t0)})
	ates <- pred.treat.diff-pred.ctrl.diff #compute simulated effects
	ci.effect <- quantile(ates,prob=c(0.025,0.975)) #95% CI
	pred.ngodensity45[i,] <-round(c(intercept=mean(pred.ctrl.t0),
					intercept.sd=sd(pred.ctrl.t0),
				  intercept.p=sum(pred.ctrl.t0<=0)/length(pred.ctrl.t0),
				  ate=mean(ates),sd.ate=sd(ates),p.ate=2*sum(ates<=0)/length(ates),
				  Controls=1,N=nrow(lre45.3c@frame), 
				  Mun=length(unique(lre45.3c@frame$mun)),
				  ci.effect),3)
	}
	pred.ngodensity45<-data.frame(logngodensity=dens,pred.ngodensity45)
	
	### PLOT CHANGE IN EFFECTS WITH NGO DENSITY FOR PSDB
	### Figure 3.B in the paper
	#pdf(file="fig-psdb.pdf")
	png(file="fig-psdb.png",res=300,height=7,width=7,units="in")
	x<-pred.ngodensity45
	par(mar=c(4,4,1,1))
	plot(x$logngodensity,x$Ate,type="l",xlab="",ylab="",xaxt="n",
		#ylim=range(c(x$ci.low,x$ci.high))) #use the same scale as for the PT
		ylim=range(c(pred.ngodensity13$ci.low,pred.ngodensity13$ci.high)))
	abline(h=0)
	polygon(x=c(x$logngodensity,x$logngodensity[nrow(x):1]),
			y=c(x$ci.low,x$ci.high[nrow(x):1]),
			,border=NA,col=gray(0.4))
	lines(x$logngodensity,x$Ate,col=gray(0.9))
	axis(side=1,at=subset(tall,pmdb2004==F)$logngodensity,labels=NA,line=0)
	axis(side=1,at=
		quantile(subset(tall,pmdb2004==F)$logngodensity,
				prob=c(0,0.05,0.5,0.95,1),na.rm=T),
				tick=T,labels=c("Low","5th","50th","95th","High"),col=1,line=0)
	mtext("(Log of) NGO Density",side=1,line=2.5)
	mtext("Effect of Establishing Local Presence on Identification with PSDB",side=2,line=2.5)
	dev.off()


### Assemble table A6 in Supplemental INformation
civilsoctable <- data.frame(pt=stack.lmer(lre13.3c)[,c(2,3,1)],
						pmdb=stack.lmer(lre15.3c)[,-c(2,3)],
						psdb=stack.lmer(lre45.3c)[,-c(2,3)])			
selected.first <- c(
	grep("Intercept",civilsoctable$ind),
	grep("^tt1$",civilsoctable$ind),
	grep("^logngodensity",civilsoctable$ind),
	grep("^open\\d\\dTRUE",civilsoctable$ind,perl=T),
	grep("tt1\\.logngodensity",civilsoctable$ind),
	grep("tt1\\.open\\d\\dTRUE",civilsoctable$ind))
	selected.last <- c(
	grep("zN",civilsoctable$ind),
	grep("zGroups",civilsoctable$ind))
	others <- setdiff(1:nrow(civilsoctable),c(selected.last,selected.first))	 
civilsoctable <- civilsoctable[c(selected.first,others,selected.last),]
civilsoctable[,2] <- ifelse(civilsoctable[,2]==2,"SE",ifelse(civilsoctable[,2]==3,"p-val",""))
rownames(civilsoctable) <- paste(civilsoctable[,1],civilsoctable[,2],sep="")
civilsoctable <- civilsoctable[,-c(1,2)]
civilsoctable.xtable <- xtable(civilsoctable,digits=3)
caption(civilsoctable.xtable)=
	"Effects of Local Presence and Civil Society Denstiy on Identification with PT and PMDB (All regression coefficients)"
print(civilsoctable.xtable,capiton.placement="top",hline.after=c(0,3,6,9))				  	
